# libraries

install.packages("packcircles")
library(packcircles)

install.packages("ggplot2")
library(ggplot2)

install.packages("viridis")
library(viridis)

install.packages("readxl")
library(readxl)

google <- read_excel("googleplaystore1.xlsx")

google$Size <- ifelse(google$Size == 'Varies with device', NA , google$Size)
google <- na.omit(google)

install.packages("dplyr")
library(dplyr)

google$Rating <- ifelse(google$Rating == 'NaN', NA , google$Rating)
google <- na.omit(google)

google_category <- google %>% group_by(Category)
google$Installs <- ifelse( google$Installs == '1+', 1, 
                            ifelse(google$Installs == '10+', 10 ,
                                   ifelse(google$Installs == '100+',100,
                                          ifelse(google$Installs == '1,000+',1000,
                                                 ifelse(google$Installs == '10,000+',10000,
                                                        ifelse(google$Installs=='100,000+',100000,
                                                               ifelse(google$Installs=='5+',5,
                                                                      ifelse(google$Installs=='50+',50,
                                                                             ifelse(google$Installs=='500+',500,
                                                                                    ifelse(google$Installs =='5,000+',5000,
                                                                                           ifelse(google$Installs=='50,000+',50000,
                                                                                                  ifelse(google$Installs=='1,000,000+',1000000,
                                                                                                         ifelse(google$Installs=='500,000+',500000,
                                                                                                                ifelse(google$Installs=='5,000,000+',5000000,
                                                                                                                       ifelse(google$Installs=='10,000,000+',10000000,
                                                                                                                              ifelse(google$Installs=='50,000,000+',50000000,
                                                                                                                                     ifelse(google$Installs=='100,000,000+',100000000,
                                                                                                                                            ifelse(google$Installs=='500,000,000+',500000000,
                                                                                                                                                   ifelse(google$Installs=='1,000,000,000+',1000000000,NA)))))))))))))))))))
google$Reviews <- as.numeric(google$Reviews)
category <- google %>% 
  filter(!is.na(Category)) %>% 
  group_by(Category) %>% 
  summarise(n = n()) %>% 
  arrange(desc(n))
View(category)  
  
Category_install <- google %>% 
  filter(!is.na(Installs) & !is.na(Category) & !is.na(Type)) %>% 
  group_by(Category) %>% 
  summarise(mean_install = mean(Installs)) %>% 
  arrange(desc(mean_install))

Category_review <- google %>% 
  filter(!is.na(Category) & !is.na(Reviews)) %>% 
  group_by(Category) %>% 
  summarise(mean_review = mean(Reviews)) %>% 
  arrange(desc(mean_review))
View(google)

# 장르별 빈도 수 그래프
# Create data
data <- data.frame(group=category$Category, value=category$n) 

# Generate the layout. sizetype can be area or radius, following your preference on what to be proportional to value.
packing <- circleProgressiveLayout(data$value, sizetype='area')
data <- cbind(data, packing)
dat.gg <- circleLayoutVertices(packing, npoints=50)


ggplot() + 
  geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id)), colour = "black", alpha = 0.6) +
  scale_fill_manual(values = magma(nrow(data))) +
  geom_text(data = data, aes(x, y, size=value, label = group)) +
  scale_size_continuous(range = c(1,4)) +
  theme_void() + 
  theme(legend.position="none") +
  coord_equal()
View(Category_install)

###########
#장르별 다운로드수와 리뷰 수 비교
install.packages("tidyverse")
library(tidyverse)
summary(Category_install$mean_install)
summary(Category_review$mean_review)

View(Category_review)

result1 <- ggplot(data=Category_install, aes( x = reorder(Category_install$Category,Category_install$mean_install), fill=Category_install$mean_install )) +
  geom_bar( ) + coord_flip()

result2 <- ggplot(data=Category_review, aes( x = reorder(Category_review$Category,Category_review$mean_review), fill=Category_review$mean_review )) +
  geom_bar( ) + coord_flip()


result1 + labs(x="Genre",y="Installs Count",title = "Installs by genre")
result2 + labs(x="Genre",y="Review Count",title = "Reviews by genre")



#####################
# 장르별 다운로드 수와 사이즈 비교
summary(google$Size)

google1 <- google
x = google1$Size
google1$Size <- gsub('M',"" ,google1$Size)
View(google1)
google1$Size <- as.numeric(google1$Size)
is.numeric(google1$Size)

# 데이터 설정
Category_install_size <- google1 %>% 
  filter(!is.na(Size) & !is.na(Category) & !is.na(Installs)) %>% 
  group_by(Category) %>% 
  summarise(mean_install = mean(Installs) , mean_size = mean(Size)) %>%
  arrange(desc(mean_size))

View(Category_install_size)

# 그래프 생성
result3 <- ggplot(data=Category_install_size, aes( x = reorder(Category_install_size$Category,Category_install_size$mean_install) , y= Category_install_size$mean_size)) +
  geom_col( ) + coord_flip()
result3 + labs(x="Genre",y="Size And Install",title = "Size by genre")

google5<-read_excel("googleplaystore1.xlsx")
install.packages("readxl")
library(readxl)
View(google5)
